library(RColorBrewer); #brewer.pal(n=8, name = "Dark2") #;display.brewer.pal(n = 8, name = "Dark2")
library(tidyverse); theme_set(theme_classic()) #ggplot2, dplyr, tibble, etc.
library(plotly) #use ggplotly() for interactive plots with scoll-over IDs
library(knitr) #use kable() to make formatted tables
library(kableExtra)
library(Rtsne)
library(glmmTMB)
library(lme4)
library(sjPlot)
library(ggpubr) #ggarrange() figure wraps
library(plyr)
library(ggeffects) #ggpredict
library(rstanarm) #stan models
library(shinystan) #stan model evaluation >launch_shinystan_demo()
library(loo) #loo() to compare fits between bayesian models
library(MuMIn) #dredge, model averaging
library(AICcmodavg) #aictab() for AICc model comparisons
library(coin) #permutation test for ratios with independence_test()
library(janitor) #get_dupes() finds and prints duplicates


Read in mouth color data (mc)


Replace zeros with NAs, sort and filter by age (23-27)

mc=tibble(read.csv("CB-mouthColor.csv", h=T)) #N=225
#replace zeros with NAs in morphometrics 
mc[mc==0] <- NA
#sort by year > family > id
mc <- mc %>% arrange(year, family, id) #N=225
#filter to include only ages 23-27
mc <- mc %>% filter(between(age,23,27)) #N=141
mc <- mc %>% drop_na(sex) #N=138


Identify duplicate records in mc

mc %>% get_dupes(id)
## # A tibble: 8 x 67
##   id        dupe_count   row family  year   age sex   tongY1 palY1 meanY1 tongM1
##   <chr>          <int> <int> <chr>  <int> <int> <chr>  <int> <int>  <int>  <int>
## 1 42 ROWA04          2   118 ROWA04  2004    24 F         60    50     55     80
## 2 42 ROWA04          2   137 ROWA04  2004    24 F         60    50     55     70
## 3 53 ROWA04          2   119 ROWA04  2004    24 F         60    50     55     70
## 4 53 ROWA04          2   138 ROWA04  2004    24 F         50    30     40     60
## 5 64 ROWA04          2   120 ROWA04  2004    24 F         70    40     55     80
## 6 64 ROWA04          2   139 ROWA04  2004    24 F         60    40     50     60
## 7 75 ROWA04          2   121 ROWA04  2004    24 F         60    40     50     70
## 8 75 ROWA04          2   140 ROWA04  2004    24 F         50    50     50     50
## # ... with 56 more variables: palM1 <int>, meanM1 <dbl>, sat1 <dbl>,
## #   hue1 <dbl>, tongY2 <int>, palY2 <int>, meanY2 <int>, tongM2 <int>,
## #   palM2 <int>, meanM2 <dbl>, sat2 <dbl>, hue2 <dbl>, bodTemp1 <dbl>,
## #   bodTemp2 <dbl>, diffBodTemp <dbl>, ambTemp1 <dbl>, ambTemp2 <dbl>,
## #   ambFinal <dbl>, broodSize <int>, bandTime1 <chr>, bandTime2 <chr>,
## #   handleTime <int>, bandOrder <int>, time1 <chr>, time2 <chr>,
## #   obtainTime <chr>, date <chr>, julDate <int>, mass <int>, tailLength <int>,
## #   tarsus <dbl>, wingChord <int>, sevenPrim <dbl>, expSevenPrim <dbl>,
## #   billNT <dbl>, billWidth <dbl>, billDepth <dbl>, TEC <dbl>, head <dbl>,
## #   skull <dbl>, timeOut1 <int>, timeOut2 <int>, obt1Max <dbl>, obt1Min <dbl>,
## #   obt1Max.1 <dbl>, obt1Min.1 <dbl>, t1MaxMinusHour <dbl>,
## #   t1MinMinusHour <dbl>, t1MeanMinusHour <dbl>, t1MaxAtBand <dbl>,
## #   t1MinAtBand <dbl>, t1MeanAtBand <dbl>, futureBreeder <int>,
## #   broodSize.1 <int>, familyProducedBreeder <int>, comments <chr>

For these duplicate records, one record for each individual is missing time 2 measurements. Therefore, I am keeping only the complete record for each individual. Removed = 42 ROWA04 (row 137), 53 ROWA04 (row 138), 64 ROWA04 (row 139), 75 ROWA04 (row 140)

mc <- mc[-c(62,67,72,78),]#N=134
#mc %>% get_dupes() #double check...yep, no duplicates

Create subsets of mc for females (N=80) and males (N=54) only

#females only
female.mc <- mc %>% 
  filter(sex == "F") #N=80
#males only
male.mc <- mc %>% 
  filter(sex == "M") #N=54


Read in nestling morphometrics data (nm)


Nestling morphometrics are being used here to boost sample size for calculating mass/tarsus residuals
Replace zeros with NAs, sort and filter by age (23-27)

nm=tibble(read.csv("nestlingMorph.csv", h=T))
#replace zeros with NAs in morphometrics 
nm[nm==0] <- NA
#remove nestlingMeasurementNumber
nm <- nm[,-1]
#sort by year > nestName > id
nm <- nm %>% arrange(year, nestName ,id)
#filter to include only ages 23-27
nm <- nm %>% filter(between(age, 23, 27)) 


Create subset of nm including only sexed birds (N=503; females=268; males=235)

#subset nm to include only sexed birds and remove NAs from mass & tarsus
sexed.nm <- nm %>% drop_na(sex, tarsus, mass) #N=503
#make sex a factor rather than character
sexed.nm$sex <- as.factor(sexed.nm$sex)
#females only 
female.nm <- sexed.nm %>% 
  filter(sex == "F") #N=268
#males only
male.nm <- sexed.nm %>% 
  filter(sex == "M") #N=235


Plot–mass distribution by sex


Only includes sexed individuals (N=503)

sexed.nm %>% 
  ggplot(aes(mass, fill=sex))+
  geom_histogram(binwidth = 10)+
  scale_fill_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))+
  annotate(geom = "text", x=375, y=29, label="N=268")+
  annotate(geom = "text", x=390, y=5, label="N=235")


Plot–tarsus distribution by sex


Only includes sexed individuals (N=503)

sexed.nm %>% 
  ggplot(aes(tarsus, fill=sex))+
  geom_histogram(binwidth = 0.5)+
  scale_fill_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))+
  annotate(geom = "text", x=59, y=28, label="N=268")+
  annotate(geom = "text", x=60, y=5, label="N=235")


Plot–mass by tarsus with sex filter (N=503)

sexed.nm %>% 
  ggplot(aes(x=tarsus,y=mass,color=sex))+
  geom_point(alpha=0.5)+
  geom_smooth()+
  scale_color_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'


Create the female mass~tarsus residual index

femaleIndex.lm <- lm(mass~tarsus, female.nm)
summary(femaleIndex.lm)
## 
## Call:
## lm(formula = mass ~ tarsus, data = female.nm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -106.011  -16.793    2.398   18.162   96.341 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -400.1482    39.9747  -10.01   <2e-16 ***
## tarsus        13.0989     0.6907   18.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 31.09 on 266 degrees of freedom
## Multiple R-squared:  0.5748, Adjusted R-squared:  0.5732 
## F-statistic: 359.6 on 1 and 266 DF,  p-value: < 2.2e-16


Check residual distributions

*Q-Q plot looks a little non-normal, probably seeing the effects of reduced sample size by separating sexes.

plot(femaleIndex.lm)


Plot–female mass~tarsus

female.nm %>% 
  ggplot(aes(x=tarsus, y=mass, color=as.factor(age)))+
  geom_point(alpha = 0.5)+
  geom_smooth(aes(group=1))+
  scale_color_brewer(palette = "Set1")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'


Create male mass~tarsus residual index


maleIndex.lm <- lm(mass~tarsus, male.nm)
summary(maleIndex.lm)
## 
## Call:
## lm(formula = mass ~ tarsus, data = male.nm)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.780  -23.824    1.778   26.411  118.573 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -367.7149    50.4700  -7.286 4.91e-12 ***
## tarsus        12.6580     0.8477  14.932  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 38.67 on 233 degrees of freedom
## Multiple R-squared:  0.489,  Adjusted R-squared:  0.4868 
## F-statistic:   223 on 1 and 233 DF,  p-value: < 2.2e-16


Plot residuals

plot(maleIndex.lm)


Plot–male tarsus~mass

male.nm %>% 
  ggplot(aes(x=tarsus, y=mass, color=as.factor(age)))+
  geom_point(alpha = 0.5)+
  geom_smooth(aes(group=1))+
  scale_color_brewer(palette = "Set1")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'


Create dataframes containing id and residuals

#male residuals pulled from lm() output
male.nm$resids <- maleIndex.lm$residuals
#female residuals pulled from lm() output
female.nm$resids <- femaleIndex.lm$residuals

#dataframes containing male and female ids and residuals for the join with mc
joinMaleResidual <- data.frame(id = male.nm$id,
                               resids = male.nm$resids)
joinFemaleResidual <- data.frame(id = female.nm$id,
                                 resids = female.nm$resids)


Join residuals with male and female mouth color data by common id

#left join with female mc
newFemale.mc <- left_join(female.mc,joinFemaleResidual, by="id")
female.mc$resids <- newFemale.mc$resids

#left join with male mc
newMale.mc <- left_join(male.mc,joinMaleResidual, by="id")
male.mc$resids <- newMale.mc$resids

#left join with both sexes (full mc) 
bothSexResid <- data.frame(rbind(joinMaleResidual,joinFemaleResidual))
new.mc <- left_join(mc,bothSexResid, by="id")
mc$resids <- new.mc$resids


Distribution of residuals for both sexes combined

mc %>%
  ggplot(aes(x=resids, fill=sex))+
  geom_histogram(binwidth = 10)+
  scale_fill_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))


Calculate mass/tarsus ratio and append a ratio column to sexed.nm

sexed.nm$ratio <- sexed.nm$mass/sexed.nm$tarsus


Permutation test to assess independence of male and female ratios

reference: https://stats.stackexchange.com/questions/23152/test-for-significant-difference-in-ratios-of-normally-distributed-random-variabl

independence_test(ratio~sex, data = sexed.nm)
## 
##  Asymptotic General Independence Test
## 
## data:  ratio by sex (F, M)
## Z = -4.6465, p-value = 3.376e-06
## alternative hypothesis: two.sided


This result doesn’t indicate the direction of the relationship, but simply indicates that the female ratio is statistically different than the male ratio. This suggests that modeling male and female mouth color separately is likely best.

Plotting mass/tarsus ratio by sex

sexed.nm %>% 
  ggplot(aes(x=sex, y=ratio, color=sex))+
  geom_boxplot()+
  ylab("Mass/tarsus ratio")+
  scale_color_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))+
  geom_jitter(alpha=0.2)


Examine whether brood size affects body temp

Plotting body temp by brood size

bodyTempByBroodSize.plot <- mc %>% 
  ggplot(aes(x=broodSize,y=bodTemp1, 
             group=as.factor(broodSize), color=as.factor(broodSize),
             label=id))+
  geom_boxplot()+
  geom_jitter()+
  scale_color_brewer(palette = "Dark2")
ggplotly(bodyTempByBroodSize.plot)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).


Contrary to our expectation, it looks like the largest broods (6 nestlings) is significantly lower in body temp on average, but it’s the smallest sample. Probably shouldn’t try to infer much, but at least we aren’t seeing big differences across brood sizes on the whole.


summary(lm(bodTemp1~as.factor(broodSize), data = mc))
## 
## Call:
## lm(formula = bodTemp1 ~ as.factor(broodSize), data = mc)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.16111 -0.57188  0.04412  0.54412  2.02812 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           40.61667    0.37826 107.379  < 2e-16 ***
## as.factor(broodSize)2 -0.55556    0.43677  -1.272  0.20571    
## as.factor(broodSize)3  0.23922    0.41028   0.583  0.56089    
## as.factor(broodSize)4  0.01754    0.40702   0.043  0.96569    
## as.factor(broodSize)5 -0.24479    0.41219  -0.594  0.55365    
## as.factor(broodSize)6 -1.51667    0.56104  -2.703  0.00781 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9265 on 127 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1474, Adjusted R-squared:  0.1139 
## F-statistic: 4.393 on 5 and 127 DF,  p-value: 0.001009


Create mass/tarsus ratio column in mc for use in models

mc$ratio <- mc$mass/mc$tarsus


Distribution of mass/tarsus ratio for both sexes combined

mc %>%
  ggplot(aes(x=ratio, fill=sex))+
  geom_histogram(binwidth = 0.4)+
  scale_fill_manual(values = c("#1B9E77", "#D73027"),
                    name = "",
                    labels = c("Female","Male"))

#hist(mc$ratio)


SATURATION MODELS


Sat 1 models with sexes combined


Sat1 ~ mass/tarsus ratio model

sat1Ratio.mdl <- lmer(sat1 ~ age + ratio + sex + 
                        bodTemp1 + ambTemp1 + timeOut1 + (1|family), 
                    data = mc, REML = FALSE)
summary(sat1Ratio.mdl)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: sat1 ~ age + ratio + sex + bodTemp1 + ambTemp1 + timeOut1 + (1 |  
##     family)
##    Data: mc
## 
##      AIC      BIC   logLik deviance df.resid 
##    833.4    857.0   -407.7    815.4       93 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4104 -0.5758  0.1540  0.5680  2.1224 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  family   (Intercept) 127.5    11.29   
##  Residual             107.2    10.35   
## Number of obs: 102, groups:  family, 34
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 159.9507    71.5024   2.237
## age          -3.4562     1.6174  -2.137
## ratio         4.9085     2.4057   2.040
## sexM         -4.0191     2.6334  -1.526
## bodTemp1      0.2575     1.7540   0.147
## ambTemp1      0.2734     0.5476   0.499
## timeOut1      0.1417     0.1516   0.935
## 
## Correlation of Fixed Effects:
##          (Intr) age    ratio  sexM   bdTmp1 ambTm1
## age      -0.427                                   
## ratio     0.101 -0.098                            
## sexM     -0.153 -0.037 -0.223                     
## bodTemp1 -0.795 -0.156 -0.261  0.198              
## ambTemp1  0.134  0.225  0.048  0.061 -0.423       
## timeOut1 -0.074 -0.123  0.095  0.003  0.100 -0.114

Plot ratio model

plot_model(sat1Ratio.mdl,
           title = "Mass/tarsus ratio model for saturation 1",
           bpe = "median",
           vline.color = "black",
           show.values = TRUE, 
           value.offset = .3,
           line.size = 1.2)


Sat1 ~ mass/tarsus residual model

sat1Residual.mdl <- lmer(sat1 ~ age + resids + sex + 
                           bodTemp1 + ambTemp1 + timeOut1 + (1|family), 
                    data = mc, REML = FALSE)
summary(sat1Residual.mdl)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: sat1 ~ age + resids + sex + bodTemp1 + ambTemp1 + timeOut1 +  
##     (1 | family)
##    Data: mc
## 
##      AIC      BIC   logLik deviance df.resid 
##    833.8    857.4   -407.9    815.8       93 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.44228 -0.52381  0.09588  0.61258  2.22548 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  family   (Intercept) 145.9    12.08   
##  Residual             103.0    10.15   
## Number of obs: 102, groups:  family, 34
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 172.10735   73.06992   2.355
## age          -3.76360    1.70386  -2.209
## resids        0.08698    0.04909   1.772
## sexM         -2.19575    2.55334  -0.860
## bodTemp1      1.03847    1.67983   0.618
## ambTemp1     -0.02192    0.56075  -0.039
## timeOut1      0.12343    0.15371   0.803
## 
## Correlation of Fixed Effects:
##          (Intr) age    resids sexM   bdTmp1 ambTm1
## age      -0.471                                   
## resids    0.177 -0.175                            
## sexM     -0.105 -0.081  0.124                     
## bodTemp1 -0.781 -0.164 -0.059  0.139              
## ambTemp1  0.086  0.246 -0.117  0.056 -0.408       
## timeOut1 -0.086 -0.108  0.016  0.035  0.132 -0.105


Plot residual model

plot_model(sat1Residual.mdl,
           title = "Mass/tarsus residual model for saturation 1",
           bpe = "median",
           vline.color = "black",
           show.values = TRUE, 
           value.offset = .3,
           line.size = 1.2)


Now let’s compare the fit of ratio and residual models

ratioOrResid.mdls <- c(sat1Ratio.mdl, sat1Residual.mdl)
ratioOrResid.modnames <- c("Ratio", "Residual")
confset(ratioOrResid.mdls, modnames = ratioOrResid.modnames, method = "ordinal")
## 
## Confidence set for the best model
## 
## Method:   ordinal ranking based on delta AIC
## 
## Models with substantial weight:
##          K   AICc Delta_AICc AICcWt
## Ratio    9 835.33       0.00   0.56
## Residual 9 835.78       0.45   0.44
## 
## 
## Models with some weight:
##      K AICc Delta_AICc AICcWt
## 
## 
## Models with little weight:
##      K AICc Delta_AICc AICcWt
## 
## 
## Models with no weight:
##      K AICc Delta_AICc AICcWt


Using AICc Weight (AICcWt), the confset() function calculates the probability of each model (i.e. each hypothesis) given the data and the other candidate models. So in this case, we have more confidence in the ratio models ability to explain the variation in the data, even though their AICc values are only slightly different.


sat1NoSex.mdl <- lmer(sat1 ~ age + resids + bodTemp1 + ambTemp1 + timeOut1 + (1|family), 
                    data = mc, REML = FALSE)
sat1NoSex.mdl2 <- lmer(sat1 ~ age + resids + bodTemp1 * ambTemp1 + (1|family), 
                    data = mc, REML = FALSE)
sat1NoSex.mdl3 <- lmer(sat1 ~ age + resids + bodTemp1 + (1|family), 
                    data = mc, REML = FALSE)


In the following model, we’re using the difference between body temp and 41 degrees, which is the mean body temp for birds > 28 days old. We chose this age range since these mature nestlings should have had an easier time maintaining an ‘ideal’ body temp.

#calculate the difference between each birds body temp and 41 degrees (mean of birds >28 days of age)
mc$tempDiff <- mc$bodTemp1-41
#model including tempDiff
diffTemp.mdl <- lmer(sat1 ~ age + resids + tempDiff + (1|family), data = mc, REML = FALSE)
summary(diffTemp.mdl)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: sat1 ~ age + resids + tempDiff + (1 | family)
##    Data: mc
## 
##      AIC      BIC   logLik deviance df.resid 
##   1077.2   1094.6   -532.6   1065.2      127 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2973 -0.5197  0.0412  0.5876  2.1990 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  family   (Intercept) 115.0    10.72   
##  Residual             113.3    10.65   
## Number of obs: 133, groups:  family, 44
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) 219.14291   35.97746   6.091
## age          -3.90227    1.41763  -2.753
## resids        0.10560    0.03517   3.003
## tempDiff      0.80103    1.27874   0.626
## 
## Correlation of Fixed Effects:
##          (Intr) age    resids
## age      -0.998              
## resids    0.069 -0.069       
## tempDiff  0.061 -0.043 -0.146
plot_model(diffTemp.mdl,
           title = "Both sexes saturation 1",
           bpe = "median",
           vline.color = "black",
           show.values = TRUE, 
           value.offset = .3,
           line.size = 1.2)


# AIC(sat1Sex.mdl, sat1NoSex.mdl)
# sexOrNo <- c(sat1Sex.mdl, sat1NoSex.mdl, ratioSex.mdl, sat1NoSex.mdl2, sat1NoSex.mdl3)
# modname <- c("sex", "no", "ratio", "noTimeOut", "noAmbTemp")
# confset(sexOrNo, modnames = modname, method = "ordinal")


HUE MODELS

hue1.mdl <- lmer(hue1*100 ~ age + ratio + sex + bodTemp1 + ambTemp1 + timeOut1 + (1|family), 
                    data = mc, REML = FALSE)


plot_model(hue1.mdl,
           title = "Ratio hue 1",
           bpe = "median",
           vline.color = "black",
           show.values = TRUE, 
           value.offset = .3,
           line.size = 1.2)